** set path
global replicate_monitoring 

clear *

** fig 1
if 1 {
	
	** panel a 
	{
		import delimited using "$replicate_monitoring\data\coefficients_88101.csv", clear
		keep if geoid10==34017 & site_num==1003 & poc==1
		cap confirm variable coef_2_2
		if !_rc {
			rename coef_2 coef
			qui forv id = 2/1000 {
				rename coef_2_`id' coef_`id'
			}
			rename coef_true_2 coef_true_1
		}
		
		bys geoid10 site_num poc: gen eday=_n-30
			
		drop if eday>30
						
		rename coef_true_1 beta
		rename coef coef_1
		
		replace beta=beta+1
		
		tsset eday 
		tsegen ma3d=rowmean(beta F1.beta F2.beta)
		replace ma3d=. if eday>=29
		
		summ beta if (inrange(eday,-30,-20)|inrange(eday,20,30))
		local outmean=`r(mean)'
		
		summ beta if inrange(eday,-3,3)
		local inmean=`r(mean)'
		
		
		tw 	scatter beta eday, msymbo(circle) col(blue) msize(1) || ///
			line ma3d eday, col(black) lp(dash) lw(0.3) ///
			xline(0,lp(dot) lcol(black) lw(0.6)) ///
			graphregion(color(white)) legend(off) ///
			ylab(0.6(0.1)1.1,nogrid) ///
			ytitle("Capture rate") xtitle("Days since pollution alert") ///
			xlab(-30 -20 -10 0 10 20 30) xsize(4.5)
		
	}
	
	** panel b
	{
		import delimited using "$replicate_monitoring\data\coefficients_88101.csv", clear
		keep if geoid10==34017 & site_num==1003 & poc==1
		cap confirm variable coef_2_2
		if !_rc {
			rename coef_2 coef
			qui forv id = 2/1000 {
				rename coef_2_`id' coef_`id'
			}
			rename coef_true_2 coef_true_1
		}
		
		bys geoid10 site_num poc: gen eday=_n-30
			
		drop if eday>30
			
		rename coef_true_1 beta
		rename coef coef_1

		qui forv id = 1/1000 {
			gen outside_`id'=coef_`id' if !(inrange(eday,-10,10))
			replace coef_`id'=. if !(inrange(eday,-3,3))
		}
		gen outside_beta=beta  if !(inrange(eday,-10,10))
		replace beta=. if !(inrange(eday,-3,3))
	
		gcollapse (mean) coef_* beta outside_*, by(geoid10 site_num poc)
		
		qui forv id = 1/1000 {
			gen stat_`id'=coef_`id'-outside_`id'
		}
		gen true_stat=beta-outside_beta
		
		drop coef_* outside_*
		
		gen id = _n
		reshape long stat_, i(id) j(iter)
		
		summ true_stat
		local true `r(mean)'
		_pctile stat_, p(2.5 97.5)
		return list
		tw 	pcarrowi 0.15 -0.054 0.15 -0.074, col(black) lw(0.2) || ///
			pcarrowi 0.15 0.05 0.15 0.07, col(black) lw(0.2) || ///
			hist stat_, w(0.01) fraction lw(0.1) lcol(black) col(gs2%25) ///
			xline(`r(r1)' `r(r2)',lcol(black) lp(dot) lw(0.6)) ///
			xline(`true',lcol(blue) lw(0.6)) ///
			graphregion(color(white)) legend(off) ///
			ylab(0(0.05)0.15, nogrid) ///
			ytitle("Fraction") xtitle("Test statistic value") ///
			yline(0,lp(dot) lcol(black) lw(0.6)) ///
			text(0.15 0 "95% range", place(c)) ///
			text(0.075 -0.105 "JCF monitor", place(w) orient(vertical)) ///
			xlab(-0.2(0.1)0.2) xsize(4.5)
		
	}
	
}

** fig 2
if 1 {
	
	** prep pval & beta coefs files 
	if 1 {
		
		foreach param in 88101 81102 44201 42602 42401 42101 {
		
			import delimited using "$replicate_monitoring\data\coefficients_`param'.csv", clear
			
			cap confirm variable coef_2_2
			if !_rc {
				rename coef_2 coef
				qui forv id = 2/1000 {
					rename coef_2_`id' coef_`id'
				}
				rename coef_true_2 coef_true_1
			}
			
			bys geoid10 site_num poc: gen eday=_n-30
				
			drop if eday>30
				
			rename coef_true_1 beta
			rename coef coef_1

			saveold "$replicate_monitoring\data\coef_`param'.dta", replace
			
			* test stat: within/buffer/outside window mean diff
			qui forv id = 1/1000 {
				gen outside_`id'=coef_`id' if !(inrange(eday,-10,10))
				replace coef_`id'=. if !(inrange(eday,-3,3))
			}
			gen outside_beta=beta  if !(inrange(eday,-10,10))
			replace beta=. if !(inrange(eday,-3,3))
			
			gcollapse (mean) coef_* beta outside_*, by(geoid10 site_num poc)
			
			qui forv id = 1/1000 {
				gen stat_`id'=coef_`id'-outside_`id'
			}
			gen true_stat=beta-outside_beta
			
			drop coef_* outside_*
			
			gen id = _n
			reshape long stat_, i(id) j(iter)
				
			gen d_l=(stat_<=true_stat)
			gen d_u=(stat_>=true_stat)
			
			gcollapse (mean) d_l d_u (first) beta true_stat, by(geoid10 site_num poc)
			gen p_2tail=2*min(d_l,d_u)
					
			keep geoid10 site_num poc true_stat p_2tail
			gen parametercode =`param'
			
			saveold "$replicate_monitoring\data\p_`param'.dta", replace
			
			keep geoid10 site_num poc parametercode p_2tail
			merge 1:m geoid10 site_num poc using "$replicate_monitoring\data\coef_`param'.dta", keepusing(eday beta) assert(match) nogen
			erase "$replicate_monitoring\data\coef_`param'.dta"
			saveold "$replicate_monitoring\data\coef_p_`param'.dta", replace
		
		}
		*** end of monitor loop
		
		clear
		foreach param in 88101 81102 44201 42602 42401 42101 {
			append using "$replicate_monitoring\data\p_`param'.dta"
		}
		
		* !!! use M Anderson codes "./code/fdr_qvalues.do" to add BH qval 
		if 0 {
			saveold "$replicate_monitoring\data\p_all.dta", replace
		}
		
	}
	
	use "$replicate_monitoring\data\p_all.dta", replace
	
	** panel a
	tw 	pcarrowi 0.042 1 0.03 1 , col(black) || ///
		hist p_2tail if p_2tail<=1, frac bin(40) lw(0.1)  ///
		lcol(teal) fcol(teal%50) ylab(0(0.05)0.2, nogrid) ///
		ytitle("Fraction") ///
		xtitle("{it:p}-value") legend(off) ///
		graphregion(color(white)) yline(0.025, lcol(black) lp(dash) lw(0.2)) ///
		text(0.05 1 "U(0,1)", place(c) just(center) ) ///
		xsize(4.5)
	
	** panel b
	tw 	pcarrowi 0.042 1 0.03 1 , col(black) || ///
		hist p_2tail if p_2tail<=1 & true_stat>0, frac lw(0) col(gs3%20) bin(40) || ///
		hist p_2tail if p_2tail<=1 & true_stat<=0, frac lw(0.1) lcol(dkgreen) fcol(none) ///
		ytitle("Fraction") ylab(0(0.05)0.2, nogrid)  ///
		bin(40) ylab(0(0.05)0.2) ///
		xtitle("{it:p}-value") legend(symxsize(*0.3) ring(0) pos(2) col(1) region(lcol(white)) order(3 `""Correct"-signed estimates"' 2 `""Wrong"-signed estimates"')) ///
		graphregion(color(white)) yline(0.025, lcol(black) lp(dash)) ///
		text(0.05 1 "U(0,1)", place(c) just(center) ) ///
		xsize(4.5)
		
}

** fig 3
if 1 {		
		
	clear
	foreach param in 88101 81102 44201 42602 42401 42101 {
		append using "$replicate_monitoring\data\coef_p_`param'.dta"
	}
	
	merge m:1 geoid10 site_num poc parametercode using "$replicate_monitoring\data\ls_monitors.dta", keep(match master) nogen
	merge m:1 geoid10 site_num poc parametercode using "$replicate_monitoring\data\p_all.dta", keepusing(bh) assert(match) nogen
	
	gen d_sig=(p_2tail<=0.05)
	
	* "interesting" vs other monitors
	foreach param in 88101 81102 44201 42602 42401 42101 {
		
		* p gram
		preserve
		keep if parametercode==`param'
			bys geoid10 site_num poc: keep if _n==1
			
			egen g_p=cut(p_2tail), at(0(0.025)1)
			gen N=1
			collapse (count) N, by(g_p)
			drop if mi(g_p)
			summ N
			gen frac = N/r(sum)
			
			tw  bar frac g_p if g_p<=0.05, barw(0.025) lw(0.1) col(blue%60) || ///
				bar frac g_p if g_p>0.05, barw(0.025) lw(0.01) lcol(gs7%20) col(gs7%20)  ///
				ylab(0 0.1, nogrid) ///
				ytitle("Frac.") ///
				xtitle("{it:p}-value") legend(off) ///
				graphregion(color(white)) yline(0.025, lcol(black) lp(dash) lw(0.2)) ///
				fysize(20) xsize(4.5)
			gr save _p_`param'.gph, replace
		restore
		
		* beta gram
		preserve
		keep if parametercode==`param'
			collapse (mean) beta [aw=N_warning], by(eday d_sig)
			summ beta if d_sig==0 & (inrange(eday,-30,-20))
			replace beta=beta-r(mean) if d_sig==0
			summ beta if d_sig==1 & (inrange(eday,-30,-20))
			replace beta=beta-r(mean) if d_sig==1
			
			tsset d_sig eday 
			tsegen ma3d=rowmean(beta F1.beta F2.beta)
			replace ma3d=. if eday>=29
			

			if `param'==88101 {
				local pollname PM{subscript:2.5}
				local texty 0.02
			}
			else if `param'==81102 {
				local pollname PM{subscript:10}
				local texty 0.03
			}
			else if `param'==44201 {
				local pollname O{subscript:3}
				local texty 0.02
			}
			else if `param'==42602 {
				local pollname NO{subscript:2}
				local texty 0.02
			}
			else if `param'==42401 {
				local pollname SO{subscript:2}
				local texty 0.02
			}
			else if `param'==42101 {
				local pollname CO
				local texty 0.02
			}
			
			tw 	scatter beta eday if d_sig==0, mlcol(gs11) mfcol(white) || ///
				scatter beta eday if d_sig==1, msize(1.3) msymbol(triangle) mlcol(blue) mfcol(white) || ///
				line ma3d eday if d_sig==0, lcol(gs9) lp(dash) || ///
				line ma3d eday if d_sig==1, lcol(blue) ///
				xline(0,lp(dot) lcol(black) lw(0.6)) ylab(-0.06(0.02)0.02,nogrid) ///
				graphregion(color(white)) legend(off) ///
				ytitle("Change in capture rate") xtitle("Days since pollution alert") ///
				text(`texty' -30 "`pollname'", size(vlarge) place(se) box just(left) fcolor(white) lw(0.4) margin(medsmall)) ///
				xlab(-30 -20 -10 0 10 20 30) xsize(4.5)
			gr save _beta_`param'.gph, replace
			
		restore
		
		gr 	combine _beta_`param'.gph _p_`param'.gph, ///
			imargin(0 0 0 0) ///
			graphregion(col(white) margin(vsmall)) ///
			cols(1) xsize(4)
	
	}
	
}

** fig 4
if 1 {
	** see website: https://www.google.com/maps/d/viewer?mid=1e6vuA_OXa-QfCMrYanwkWV7XiGl50d1q&ll=38.52680008919964%2C-95.88026400000001&z=5
}

** fig 5
if 1 {
	
	** estimation 
	use "$replicate_monitoring\data\pm25_monitor_dv_capture_2001_2015.dta", clear 
	bys site year qtr: keep if _n==1
	gen geoid10=statecode+countycode
	destring geoid10, replace
	
	by site year : gen annual_required = sum(required)
	by site year : replace annual_required = annual_required[_N]
	gen d_1in1 = (annual_required==365|annual_required==366) if !mi(annual_required)
	
	gen rfrnc_qos = year*100+qtr 
	
	egen site_ID = group(site)
	
	gegen g1_annual_mean=cut(annual_mean), at(7(1)16)
	replace g1_annual_mean = 7 if annual_mean < 7 & !mi(annual_mean)
	replace g1_annual_mean = 16 if annual_mean > 16 & !mi(annual_mean)
	
	gen site_num=substr(site,6,4)
	destring site_num , replace 
	gen parametercode=88101
	
	merge m:1 geoid10 site_num parametercode using "$replicate_monitoring\data\p_all_site.dta", keep(match master) nogen
	gen d_p05 = (p_2tail<=0.05) if !mi(p_2tail)
	
	tsset site_ID rfrnc_qos
	
	reghdfe capture i.year i.g1_annual_mean if d_1in1==1 & d_p05==1, a(site_ID) 
	reghdfe capture i.year i.g1_annual_mean if d_1in1==1 & !(d_p05==1), a(site_ID) 
	
	
	** plot 
	import excel using "$replicate_monitoring\data\capture.xlsx", sheet("capture") clear first 
	
	lab define g_lb 7 "< 8" 16 "> 16", replace
	lab value g g_lb
	
	tw 	rcap l1 u1 g, lcol(blue) || ///
		scatter b1 g, msymbol(triangle) mlcol(blue) mfcol(white) || ///
		scatteri 0.1 6.5 0.1 16.5, recast(line) lcol(white) ///
		graphregion(color(white)) legend(off) ///
		ylab(-0.5(0.1)0.1, nogrid) yline(0,lcol(black) lw(0.6) lp(dot)) ///
		xlab(7(1)16, valuelabel) ///
		ytitle("Change in capture rate") xtitle("Annual PM{subscript:2.5} value (ug/m3)") ///
		xsize(4.5)
	
	tw 	rcap l0 u0 g, lcol(gs7) || ///
		scatter b0 g, mlcol(gs7) mfcol(white) || ///
		scatteri 0.1 6.5 0.1 16.5, recast(line) lcol(white) ///
		graphregion(color(white)) legend(off) ///
		ylab(-0.5(0.1)0.1, nogrid) yline(0,lcol(black) lw(0.6) lp(dot)) ///
		xlab(7(1)16, valuelabel) ///
		ytitle("Change in capture rate") xtitle("Annual PM{subscript:2.5} value (ug/m3)") ///
		xsize(4.5)
	
}

** fig 6 
if 1 {
	
	** panel a, left
	use "$replicate_monitoring\data\imputation.dta", clear 
	gen d_p05 = (p_2tail<=0.05) if !mi(true_stat)
	keep if parametercode==88101
	
	tw 	kdensity conc_impute if mi(conc_true) & !mi(conc_impute) & d_p05==1, col(blue) range(0 50) || ///
		kdensity conc_impute if !mi(conc_true) & !mi(conc_impute) & d_p05==1, col(blue) lp(dash) range(0 50) || ///
		kdensity conc_true   if !mi(conc_true) & !mi(conc_impute) & d_p05==1, col(gs11) lp(dash) range(0 50) || ///
		scatteri 0.106 15 0 15, recast(line) lp(dot) lcol(black) lw(0.5) || ///
		scatteri 0.106 35 0 35, recast(line) lp(dot) lcol(black) lw(0.5)  ///
		legend(col(1) order(3 "Observed value, when not missing monitoring" 2 "Predicted value, when not missing monitoring" 1 "Predicted value, when missing monitoring")) ///
		graphregion(col(white)) ///
		ylab(0(0.03)0.12, nogrid) ///
		ytitle("Density") ///
		xtitle("PM{subscript:2.5} (ug/m3)") ///
		text(0.12 15 "Long-term", place(c) just(center) ) ///
		text(0.12 35 "Short-term", place(c) just(center) ) ///
		text(0.113 15 "standard", place(c) just(center) ) ///
		text(0.113 35 "standard", place(c) just(center) ) ///
		xsize(4.5)
	
	** panel a, right
	tw 	kdensity conc_impute if mi(conc_true) & !mi(conc_impute) & !(d_p05==1), col(blue) range(0 50) || ///
		kdensity conc_impute if !mi(conc_true) & !mi(conc_impute) & !(d_p05==1), col(blue) lp(dash) range(0 50) || ///
		kdensity conc_true   if !mi(conc_true) & !mi(conc_impute) & !(d_p05==1), col(gs11) lp(dash) range(0 50) || ///
		scatteri 0.106 15 0 15, recast(line) lp(dot) lcol(black) lw(0.5) || ///
		scatteri 0.106 35 0 35, recast(line) lp(dot) lcol(black) lw(0.5)  ///
		legend(col(1) order(3 "Observed value, when not missing monitoring" 2 "Predicted value, when not missing monitoring" 1 "Predicted value, when missing monitoring")) ///
		graphregion(col(white)) ///
		ylab(0(0.03)0.12, nogrid) ///
		ytitle("Density") ///
		xtitle("PM{subscript:2.5} (ug/m3)") ///
		text(0.12 15 "Long-term", place(c) just(center) ) ///
		text(0.12 35 "Short-term", place(c) just(center) ) ///
		text(0.113 15 "standard", place(c) just(center) ) ///
		text(0.113 35 "standard", place(c) just(center) ) ///
		xsize(4.5)

	** panel b, left	
	use "$replicate_monitoring\data\imputation_sat.dta", clear
	
	tw 	kdensity conc_sat if mi(conc_true) & !mi(conc_sat) & d_p05==1, col(teal) range(0 50) || ///
		kdensity conc_sat if !mi(conc_true) & !mi(conc_sat) & d_p05==1, col(teal) lp(dash) range(0 50) || ///
		kdensity conc_true   if !mi(conc_true) & !mi(conc_sat) & d_p05==1, col(gs11) lp(dash) range(0 50) || ///
		scatteri 0.106 15 0 15, recast(line) lp(dot) lcol(black) lw(0.5) || ///
		scatteri 0.106 35 0 35, recast(line) lp(dot) lcol(black) lw(0.5)  ///
		legend(col(1) order(3 "Observed value, when not missing monitoring" 2 "Predicted value, when not missing monitoring" 1 "Predicted value, when missing monitoring")) ///
		graphregion(col(white)) ///
		ylab(0(0.03)0.12, nogrid) ///
		ytitle("Density") ///
		xtitle("PM{subscript:2.5} (ug/m3)") ///
		text(0.12 15 "Long-term", place(c) just(center) ) ///
		text(0.12 35 "Short-term", place(c) just(center) ) ///
		text(0.113 15 "standard", place(c) just(center) ) ///
		text(0.113 35 "standard", place(c) just(center) ) ///
		xsize(4.5)
	
	** panel b, right
	tw 	kdensity conc_sat if mi(conc_true) & !mi(conc_sat) & !(d_p05==1), col(teal) range(0 50) || ///
		kdensity conc_sat if !mi(conc_true) & !mi(conc_sat) & !(d_p05==1), col(teal) lp(dash) range(0 50) || ///
		kdensity conc_true   if !mi(conc_true) & !mi(conc_sat) & !(d_p05==1), col(gs11) lp(dash) range(0 50) || ///
		scatteri 0.106 15 0 15, recast(line) lp(dot) lcol(black) lw(0.5) || ///
		scatteri 0.106 35 0 35, recast(line) lp(dot) lcol(black) lw(0.5)  ///
		legend(col(1) order(3 "Observed value, when not missing monitoring" 2 "Predicted value, when not missing monitoring" 1 "Predicted value, when missing monitoring")) ///
		graphregion(col(white)) ///
		ylab(0(0.03)0.12, nogrid) ///
		ytitle("Density") ///
		xtitle("PM{subscript:2.5} (ug/m3)") ///
		text(0.12 15 "Long-term", place(c) just(center) ) ///
		text(0.12 35 "Short-term", place(c) just(center) ) ///
		text(0.113 15 "standard", place(c) just(center) ) ///
		text(0.113 35 "standard", place(c) just(center) ) ///
		xsize(4.5)
	
}

** tab 1 
if 1 {
	
	use "$replicate_monitoring\data\p_all.dta", replace
	isid geoid site_num poc parametercode
	
	* sig lvls 
	gen d_p05 = (p_2tail<=0.05)
	gen d_q05 = (bh95_qval<=0.05)
	gen d_p10 = (p_2tail<=0.10)
	gen d_q10 = (bh95_qval<=0.10)
	
	* dip or spike
	gen d_dip = (true_stat<0)
	
	* monitor Xs
	merge 1:1 geoid10 site_num poc parametercode using "$replicate_monitoring\data\ls_monitors.dta", keep(match master) nogen
	
	* county Xs
	gen countyfip=geoid
	tostring countyfip, gen(str_countyfip)
	replace str_countyfip="0"+str_countyfip if strlen(str_countyfip)==4
	gen statefip=substr(str_countyfip,1,2)
	destring statefip, replace
	
	rename geoid10 geoid
	merge m:1 geoid using "$replicate_monitoring\data\ever_natt_county.dta", keep(match master) nogen keepusing(ever_natt)
	replace ever_natt=0 if mi(ever_natt) 
	
	merge m:1 statefip using  "$replicate_monitoring\data\PIN_convict_state_avg.dta", keep(match master) nogen
	merge m:1 statefip using  "$replicate_monitoring\data\LCV_state_avg.dta", keep(match master) nogen
	merge m:1 statefip using  "$replicate_monitoring\data\gallup_party_state.dta", keep(match master) nogen
	merge m:1 countyfip using "$replicate_monitoring\data\BEA_gov_emp_allgeo_share_avg.dta", keep(match master) nogen
	
	foreach var in N_convict_pc LCVavg f_gov_cnty f_dem {
		summ `var', d
		gen abovemed_`var'=(`var'>=r(p50))
	}
	
	** c1-c4
	reg d_p05 ever_natt
	reg d_p05 d_dip c.ever_natt#bN.d_dip
	reg d_p05 d_dip c.ever_natt#bN.d_dip abovemed*
	reg d_p05 d_dip c.ever_natt#bN.d_dip, a(statefip)
	
	** c5-c8
	reg d_q05 ever_natt
	reg d_q05 d_dip c.ever_natt#bN.d_dip
	reg d_q05 d_dip c.ever_natt#bN.d_dip abovemed*
	reg d_q05 d_dip c.ever_natt#bN.d_dip, a(statefip)
}
